In this Notebook, the german cellular networks are analysed using the Mozilla mls data.
There are some caveats here: This analysis is based on the correctness of the mls data, which probably also contains old or deprecated cells. Furthermore, I am no expert on cellular network technology, so please let me know about errors.
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np
import gzip
import urllib.request
import io
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
The data is downloaded here. Furthermore, the data is filtered to only contain german LTE cells.
today = datetime.date.today().strftime('%Y-%m-%d')
baseURL = "https://d3r3tk5171bc5t.cloudfront.net/export/"
filename = "MLS-full-cell-export-"+today+"T000000.csv.gz"
outFilePath = datetime.date.today().strftime('%Y%m%d')+'.csv.gz'
#with open(outFilePath, 'wb') as outfile:
# #outfile.write(gzip.decompress(response.read()))
# outfile.write
#cells = pd.read_csv('mls20181201.csv')
response = urllib.request.urlopen(baseURL+filename)
cells = pd.read_csv(io.BytesIO(response.read()), compression='gzip')
import math
# derived from the Java version explained here: http://wiki.openstreetmap.org/wiki/Mercator
RADIUS = 6378137.0 # in meters on the equator
def lat2y(a):
return math.log(math.tan(math.pi / 4 + math.radians(a) / 2)) * RADIUS
def lon2x(a):
return math.radians(a) * RADIUS
def addWebMercartor(tab):
tempTab = tab.assign(lon6 = tab.lon.apply(lon2x))
tempTab = tempTab.assign(lat6 = tab.lat.apply(lat2y))
return tempTab
# add some more colums, readable time and tower ids
def addInfo(df):
df['eNB'] = df['cell']//(256)
df['sector']=df['cell']%256
df['updatedP'] = pd.to_datetime(df['updated'], unit="s")
df['createdR'] = pd.to_datetime(df.created, unit='s')
df.updated = pd.to_datetime(df.updated, unit='s')
addInfo(cells)
cells = addWebMercartor(cells)
cellsLTE = cells.query('radio=="LTE"')
cellsDeLte = cellsLTE.query('mcc==262 and lon>5 and lon<15.5 and lat>47 and lat<55.5')
#cellsFR = cells.query('mcc==208 and radio=="LTE" and lon>5 and lon<15.5 and lat>47 and lat<55.5')
#del cells
#cellsDE = cells[cells.mcc==262]
#cellsDeLte = cellsDE[cellsDE.radio=='LTE']
# create one
def filterAndGroup(net, samples, timeU):
testdelta = datetime.timedelta(1)
testdelta2 = datetime.timedelta(30)
testnew = datetime.datetime.today()-testdelta2
tmp = cellsDeLte.query('net==@net and samples>@samples and updated>@timeU and eNB>9999').groupby('eNB').agg({'lat':np.mean, 'lon':np.mean,'lat6':np.mean, 'lon6':np.mean, 'samples':np.sum, 'created':np.min,'createdR':np.min, 'updated':np.max, 'sector':np.count_nonzero})
#tmp['createdR'] = pd.to_datetime(tmp.created, unit='s')
#tmp.updated = pd.to_datetime(tmp.updated, unit='s')
tmp['id'] = tmp.index.values
return tmp.query('updated-createdR >= @testdelta or createdR>@testnew')
timedec = datetime.datetime(2018,11,20)
#cutoff = datetime.datetime(2018,1,1).timestamp()
cutoff = pd.Timestamp(2018,1,1)
dt = filterAndGroup(1,3,cutoff)
vf = filterAndGroup(2,3,cutoff)
o2 = filterAndGroup(3,3,cutoff)
Now, let's look at the data. First, the development of the networks over time is analysed.
print('Number of towers')
print('o2\t'+str(len(o2))+'\nTelekom\t'+str(len(dt))+'\nVF\t'+(str(len(vf))))
print('Number of cells')
print('o2\t '+str(len(cellsDeLte.query('net==3')))+'\nTelekom\t'+str(len(cellsDeLte.query('net==1')))+'\nVF\t '+(str(len(cellsDeLte.query('net==2')))))
now = int(datetime.datetime.now().strftime('%s'))
timepoints = []
start = datetime.datetime(2017,1,1)
#step = datetime.timedelta(7)
now = datetime.datetime.now()
#for i in range(start, now, step):
# timepoints.append(i)
#tpH = pd.to_datetime(timepoints, unit="s")
timepoints = pd.date_range(start, now, freq="w")
o2.createdR.hist(alpha=0.5)
dt.createdR.hist(alpha=0.5)
vf.createdR.hist(alpha=0.5)
plt.show()
towers = pd.DataFrame(index=timepoints, columns=['Telekom', 'Vodafone', 'O2'])
for i in timepoints:
upCheck = i-datetime.timedelta(days=365)
numberDT= len(dt.query('createdR<@i and updated>@upCheck'))
numberVF= len(vf.query('createdR<@i and updated>@upCheck'))
numberO2= len(o2.query('createdR<@i and updated>@upCheck'))
towers.loc[i]=[numberDT, numberVF, numberO2]
towers
n_cells = pd.DataFrame(index=timepoints, columns=cellsDeLte.net.unique())
for i in timepoints:
upCheck = i-datetime.timedelta(days=365)
for j in cellsDeLte.net.unique():
n_cells.loc[i,j]= len(cellsDeLte.query('net==@j and createdR<@i and updated>@upCheck'))
#numberVF= len(cellsDeLte.query('net==2 and createdR<@i and updated>@upCheck'))
#numberO2= len(cellsDeLte.query('net==3 and createdR<@i and updated>@upCheck'))
#cells.loc[i]=[numberDT, numberVF, numberO2]
towersNew = pd.DataFrame(index=timepoints, columns=['Telekom', 'Vodafone', 'O2'])
for i in timepoints:#[38:]:
start = i-datetime.timedelta(days=7)
numberDT = len(dt.query('createdR < @i and createdR > @start'))
numberVF = len(vf.query('createdR < @i and createdR > @start'))
numberO2 = len(o2.query('createdR < @i and createdR > @start'))
towersNew.loc[i]=[numberDT, numberVF, numberO2]
towersNew
towersNewConsolidated = towersNew[towersNew.index > pd.Timestamp(2018,4,14)]
cellsNew = pd.DataFrame(index=timepoints, columns=cellsDeLte.net.unique())
for i in timepoints:#[38:]:
start = i-datetime.timedelta(days=7)
for j in cellsDeLte.net.unique():
cellsNew.loc[i,j] = len(cellsDeLte.query('net==@j and createdR < @i and createdR > @start'))
#numberVF= len(cellsDeLte.query('net==2 and createdR < @i and createdR > @start'))
#numberO2= len(cellsDeLte.query('net==3 and createdR < @i and createdR > @start'))
#cellsNew.loc[i]=[numberDT, numberVF, numberO2]
cellsNewConsolidated = cellsNew[cellsNew.index > pd.Timestamp(2018,4,14)]
#towers.plot()
fig, ax = plt.subplots(ncols=3, figsize=(30,8))
towers.plot(ax=ax[0],title='Cell towers per carrier')
#plt.legend(loc='upper left')
towersNew.plot(ax=ax[1], title='New cell towers per week')
towersNewConsolidated.plot(ax=ax[2], title='New cell towers per week (from 2018-4)')
plt.show()
#towers.plot()
fig, ax = plt.subplots(ncols=3, figsize=(30,8))
n_cells.plot(ax=ax[0],title='Cells per carrier')
#plt.legend(loc='upper left')
cellsNew.plot(ax=ax[1], title='New cells per week')
cellsNewConsolidated.plot(ax=ax[2], title='New cells per week (from 2018-4)')
plt.show()
The distribution of cell towers between carriers shows as expected.
Over the last year, o2 has been closing the gap to Vodafone.
Telekom is the carrier with the most sites.
However, their conversion to single RAN is not completed yet, which means that different frequencies broadcasted from the same tower might have different eNB ids.
Over the last year, O2 has consistently been the provider with the most newest sites per week, ranging from around 50 to 100 sites. Due to the O2 E-Plus merger, o2 changed its network id to 262-3 which lead to the sudden increase of O2 sites between July 2017 and April 2018
When not looking at the towers but at the cells, O2 actually surpassed Vodafone in late 2018. Here, the numbers of all three providers are comparable, as one single cell is always counted as one.
o2B1 = cellsDeLte.query('net==3 and samples>3 and sector>30 and sector<40 and cell>100000').groupby('eNB').median()
#o2B1.loc[:,'updatedP'] = pd.to_datetime(o2B1['updated'], unit="s")
o2B1['id'] = o2B1.index.values
#o2B1['createdR'] = pd.to_datetime(o2B1.created, unit="s")
Now, lets plot a map for each carrier. O2 is blue, Vodafone red and Telekom magenta. The maps are interactive and can be zoomed and moved.
from bokeh.io import output_file, show, output_notebook
from bokeh.models import ColumnDataSource, GMapOptions, HoverTool, ColorBar
from bokeh.plotting import gmap, figure
from bokeh.tile_providers import STAMEN_TONER
from bokeh.layouts import row
from bokeh.transform import linear_cmap
from bokeh.palettes import Inferno
#output_file("all.html")
output_notebook()
#map_options = GMapOptions(lat=51.3, lng=10.5, map_type="roadmap", zoom=7)
# For GMaps to function, Google requires you obtain and enable an API key:
#
# https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
def createMap(dataSource, colour, hexbin=True, size=3, mapCreate=False):
mapTemp = figure(x_range=(700000, 1600000), y_range=(6000000, 7300000),
x_axis_type="mercator", y_axis_type="mercator", plot_height=800, plot_width = 700)
mapTemp.add_tile(STAMEN_TONER)
#dataSourceConv = addWebMercartor(dataSource)
sourceTemp = ColumnDataSource(dataSource)
if hexbin:
r, bins = mapTemp.hexbin(dataSource['lon6'].values, dataSource['lat6'].values, size=20000, hover_color="pink", hover_alpha=0.6, fill_alpha=0.1)
#bokeh_hexbin(mapTemp, dataSourceConv['lon6'].values, dataSourceConv['lat6'].values, bins=10)
circles = None
if mapCreate:
mapper = linear_cmap(field_name='created', palette=Inferno[10] ,low=min(dataSource.created) ,high=max(dataSource.created))
circles = mapTemp.circle(x="lon6", y="lat6", size=size, fill_color=mapper, fill_alpha=0.8, source=sourceTemp, line_alpha=0, line_width=10)
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
mapTemp.add_layout(color_bar, 'right')
else:
circles = mapTemp.circle(x="lon6", y="lat6", size=size, fill_color=colour, fill_alpha=0.8, source=sourceTemp, line_alpha=0, line_width=10)
hover = HoverTool()
hover.tooltips=[('CellID', '@id'),('Samples', '@samples'), ('Created', '@createdR{%F}'),('Changed','@updated{%F}')]
hover.formatters={'updated': 'datetime', 'createdR': 'datetime'}
hover.renderers=[circles]
if hexbin:
hover2 = HoverTool(
tooltips=[("count", "@c"), ("(q,r)", "(@q, @r)")],
mode="mouse", point_policy="follow_mouse", renderers=[r])
mapTemp.add_tools(hover2, hover)
return mapTemp, bins
else:
mapTemp.add_tools(hover)
return mapTemp, None
o2map, o2hex = createMap(o2, "blue",True)
dtmap, dthex = createMap(dt, "magenta",True)
vfmap, vfhex = createMap(vf, "red",True)
show(row(o2map, dtmap, vfmap))
o2mapC, o2hexC = createMap(cellsDeLte.query('net==3'), "blue",True)
dtmapC, dthexC = createMap(cellsDeLte.query('net==1'), "magenta",True)
vfmapC, vfhexC = createMap(cellsDeLte.query('net==2'), "red",True)
#show(row(o2mapC, dtmapC, vfmapC))
# Plot new towers since dat
dat = datetime.datetime(2019,1,1)
newmap3, _ = createMap(o2.query('createdR>@dat'), "blue", False,5,True)
newmap1, _ = createMap(dt.query('createdR>@dat'), "blue", False,5,True)
newmap2, _ = createMap(vf.query('createdR>@dat'), "blue", False,5,True)
show(row(newmap3, newmap1, newmap2))
In the above plot, all new sites this year are plotted (O2, Telekom and Vodafone in this order). The dots are coloured by date of creation (lighter is newer). O2 seems to activate many sites in rural areas, where they are still rather weak compared to the other providers.
Recently, o2 has been rolling out LTE in band 1. The map below shows the state of this rollout. Cells are colored by creation date.
b1, _ = createMap(o2B1, "blue", False,10,mapCreate=True)
show(b1)
fig, ax = plt.subplots(ncols=3, figsize=(32,10))
#plt.figure(figsize=(12,10))
im = ax[0].hexbin(dt.lon6, dt.lat6, bins='log', gridsize=40, mincnt=1, vmax=200)
ax[0].axis('off')
ax[0].title.set_text('Telekom.de')
ax[1].hexbin(vf.lon6, vf.lat6, bins='log', gridsize=40, mincnt=1, vmax=200)
ax[1].axis('off')
ax[1].title.set_text('Vodafone.de')
ax[2].hexbin(o2.lon6, o2.lat6, bins='log', gridsize=40, mincnt=1, vmax=200)
ax[2].axis('off')
ax[2].title.set_text('o2-de')
cbar = fig.colorbar(im, ax=ax.ravel().tolist(), shrink=0.95)
#ax[1].colorbar()
plt.show()
In the above plot, the density of the networks is analysed. While the density of Telekom and Vodafone seem to look similar, O2 has large, completely unserved areas (white), while the big cities seem to look similar compared to the other providers.
As seen in the previous plot, there seem to be large differences between the providers. To analyse where these differences come from, we take a closer look at the data, comparing the cell tower distribution over hexagonal bins of a size of roughly 375 km2.
o2hex = pd.DataFrame(o2hex)
vfhex = pd.DataFrame(vfhex)
dthex = pd.DataFrame(dthex)
merge1 = pd.merge(dthex, o2hex, left_on=['q','r'], right_on=['q','r'], how='outer', suffixes=('_dt','_o2'))
merged = pd.merge(merge1, vfhex, left_on=['q','r'], right_on=['q','r'], how='outer', suffixes=('','_vf'))
merged = merged.rename(columns={"counts":"counts_vf"})
merged = merged.query('r<-199').fillna(0)
merged = merged.query('counts_dt+counts_o2+counts_vf>2')
merged['summe'] = merged.counts_dt+merged.counts_vf+merged.counts_o2
merged['perc_dt']= merged.counts_dt/merged.summe
merged['perc_vf']= merged.counts_vf/merged.summe
merged['perc_o2']= merged.counts_o2/merged.summe
# for towers:140
We separate the bins into high density and the rest. The high density class contains bins that are located in the cities. These have a much higher count of sites per bin. Around 10% of the bins are in the cities class.
cities = merged.query('counts_dt+counts_o2+counts_vf>=140')
rest = merged.query('counts_dt+counts_o2+counts_vf<140')
print(len(merged))
print(len(cities))
print(len(rest))
print(rest.sum())
print(cities.sum())
rest.median()
In these non-city cells, the density of towers differs greatly between providers. While O2 has a median of only 5 towers per bin, Vodafone has twice as many towers on median. If we assume a coverage radius of 4 km for every tower and a bin size of 375 km2, we would need around 7-8 towers per hexagon to get close to full coverage. This means that O2 is massively lacking behind the two other providers for rural coverage, where area coverage is important.
If we now look at the number of bins with less than 8 towers for every provider, we can calculate how many new towers every provider needs to reach full coverage. This is probably not completely accurate due to terrain and bins that are only partially covering the coverage area of the german providers.
rural = merged.query('counts_o2<8')
counts = rural.counts_o2.sum()
nhex = len(rural)
print('Missing towers: '+str(nhex*7-counts))
rural = merged.query('counts_vf<8')
counts = rural.counts_o2.sum()
nhex = len(rural)
print('Missing towers: '+str(nhex*7-counts))
rural = merged.query('counts_dt<8')
counts = rural.counts_o2.sum()
nhex = len(rural)
print('Missing towers: '+str(nhex*7-counts))
sns.kdeplot(rest.counts_o2, label='O2')
sns.kdeplot(rest.counts_dt, label='Telekom')
sns.kdeplot(rest.counts_vf, label='Vodafone')
Distribution plot of towers per hexagon for every provider in the non-city areas.
sns.kdeplot(cities.counts_o2, label='O2')
sns.kdeplot(cities.counts_dt, label='Telekom')
sns.kdeplot(cities.counts_vf, label='Vodafone')
Distrubution plot of towers per hexagon in the city areas. In these areas, O2 actually already surpasses Vodafone.
cities.median()
Following the analysis of the rural areas, we now focus on the urban areas. Here, we can individually compare the number of sites for every provider on a bin level.
cities.sort_values(by='perc_dt').head(10)
cities.sort_values(by='perc_vf').head(10)
cities.sort_values(by='perc_o2').head(10)
Cities where O2 performs badly in this metric are in the order of the table Würzburg, Rosdock, Erfurt Regensburg, Koblenz, Ingolstadt, Ulm, Dresden and Augsburg.